###################
# This script is modified based on: https://github.com/sindhura97/STraTS
###################


import pandas as pd
from tqdm import tqdm
import pickle
import numpy as np

# mimic_data_dir = 'path/to/mimic-iii-clinical-database-1.4/'
mimic_data_dir = '/home/covpreduser/Blob/shunzhen/Healthcare/mimic-iii-clinical-database-1.4/'

# Get all ICU stays.
icu = pd.read_csv(mimic_data_dir+'ICUSTAYS.csv', usecols=['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'INTIME', 'OUTTIME'])
icu = icu.loc[icu.INTIME.notna()]
icu = icu.loc[icu.OUTTIME.notna()]

# Filter out pediatric patients.
pat = pd.read_csv(mimic_data_dir+'PATIENTS.csv', usecols=['SUBJECT_ID', 'DOB', 'DOD', 'GENDER'])
icu = icu.merge(pat, on='SUBJECT_ID', how='left')
icu['INTIME'] = pd.to_datetime(icu.INTIME)
icu['DOB'] = pd.to_datetime(icu.DOB)
icu['AGE'] = icu.INTIME.map(lambda x:x.year) - icu.DOB.map(lambda x:x.year)
icu = icu.loc[icu.AGE>=18] #53k icustays

# Observation
# Extract chartevents for icu stays.
ch = []
for chunk in tqdm(pd.read_csv(mimic_data_dir+'CHARTEVENTS.csv', chunksize=10000000,
                usecols = ['HADM_ID', 'ICUSTAY_ID', 'ITEMID', 'CHARTTIME', 'VALUE', 'VALUENUM', 'VALUEUOM', 'ERROR'])):
    chunk = chunk.loc[chunk.ICUSTAY_ID.isin(icu.ICUSTAY_ID)]
    chunk = chunk.loc[chunk['ERROR']!=1]
    chunk = chunk.loc[chunk.CHARTTIME.notna()]
    chunk.drop(columns=['ERROR'], inplace=True)
    ch.append(chunk)
del chunk
print ('Done')
ch = pd.concat(ch)
print ('Done')
ch = ch.loc[~(ch.VALUE.isna() & ch.VALUENUM.isna())]
ch['TABLE'] = 'chart'
print ('Done')

# Extract labevents for admissions.
la = pd.read_csv(mimic_data_dir+'LABEVENTS.csv', usecols = ['HADM_ID', 'ITEMID', 'CHARTTIME', 'VALUE', 'VALUENUM', 'VALUEUOM'])
la = la.loc[la.HADM_ID.isin(icu.HADM_ID)]
la.HADM_ID = la.HADM_ID.astype(int)
la = la.loc[la.CHARTTIME.notna()]
la = la.loc[~(la.VALUE.isna() & la.VALUENUM.isna())]
la['ICUSTAY_ID'] = np.nan
la['TABLE'] = 'lab'

# blood pressure
print("blood pressure")
# Extract bp events. Remove outliers. Make sure median values of CareVue and MetaVision items are close.
dbp = [8368, 220051, 225310, 8555, 8441, 220180, 8502, 8440, 8503, 8504, 8507, 8506, 224643, 227242]
sbp = [51, 220050, 225309, 6701, 455, 220179, 3313, 3315, 442, 3317, 3323, 3321, 224167, 227243]
mbp = [52, 220052, 225312, 224, 6702, 224322, 456, 220181, 3312, 3314, 3316, 3322, 3320, 443]
ch_bp = ch.loc[ch.ITEMID.isin(dbp+sbp+mbp)]
ch_bp = ch_bp.loc[(ch_bp.VALUENUM>=0)&(ch_bp.VALUENUM<=375)]
ch_bp.loc[ch_bp.ITEMID.isin(dbp), 'NAME'] = 'DBP'
ch_bp.loc[ch_bp.ITEMID.isin(sbp), 'NAME'] = 'SBP'
ch_bp.loc[ch_bp.ITEMID.isin(mbp), 'NAME'] = 'MBP'
ch_bp['VALUEUOM'] = 'mmHg'
ch_bp['VALUE'] = None
events = ch_bp.copy()
del ch_bp

# Extract GCS events. Checked for outliers.
print("GCS")
gcs_eye = [184, 220739]
gcs_motor = [454, 223901]
gcs_verbal = [723, 223900]
ch_gcs = ch.loc[ch.ITEMID.isin(gcs_eye+gcs_motor+gcs_verbal)]
ch_gcs.loc[ch_gcs.ITEMID.isin(gcs_eye+gcs_motor+gcs_verbal), 'NAME'] = 'GCS'
ch_gcs['VALUEUOM'] = None
ch_gcs['VALUE'] = None
events = pd.concat([events, ch_gcs])
del ch_gcs

# Extract heart_rate events. Remove outliers.
print("heart_rate")
hr = [211, 220045]
ch_hr = ch.loc[ch.ITEMID.isin(hr)]
ch_hr = ch_hr.loc[(ch_hr.VALUENUM>=0)&(ch_hr.VALUENUM<=390)]
ch_hr['NAME'] = 'HR'
ch_hr['VALUEUOM'] = 'bpm'
ch_hr['VALUE'] = None
events = pd.concat([events, ch_hr])
del ch_hr

# Extract respiratory_rate events. Remove outliers. Checked unit consistency.
print("respiratory_rate")
rr = [618, 220210, 3603, 224689, 614, 651, 224422, 615, 224690, 619, 224688, 227860, 227918]
ch_rr = ch.loc[ch.ITEMID.isin(rr)]
ch_rr = ch_rr.loc[(ch_rr.VALUENUM>=0)&(ch_rr.VALUENUM<=330)]
ch_rr['NAME'] = 'RR'
ch_rr['VALUEUOM'] = 'brpm'
ch_rr['VALUE'] = None
events = pd.concat([events, ch_rr])
del ch_rr

# Extract temperature events. Convert F to C. Remove outliers.
print("temperature")
temp_c = [3655, 677, 676, 223762]
temp_f = [223761, 678, 679, 3654]
ch_temp_c = ch.loc[ch.ITEMID.isin(temp_c)]
ch_temp_f = ch.loc[ch.ITEMID.isin(temp_f)]
ch_temp_f.VALUENUM = (ch_temp_f.VALUENUM-32)*5/9
ch_temp = pd.concat([ch_temp_c, ch_temp_f])
del ch_temp_c
del ch_temp_f
ch_temp = ch_temp.loc[(ch_temp.VALUENUM>=14.2)&(ch_temp.VALUENUM<=47)]
ch_temp['NAME'] = 'Temperature'
ch_temp['VALUEUOM'] = 'C'
ch_temp['VALUE'] = None
events = pd.concat([events, ch_temp])
del ch_temp


# Extract fio2 events. Convert % to fraction. Remove outliers.
print("fio2")
fio2 = [3420, 223835, 3422, 189, 727, 190]
ch_fio2 = ch.loc[ch.ITEMID.isin(fio2)]
idx = ch_fio2.VALUENUM>1.0
ch_fio2.loc[idx, 'VALUENUM'] = ch_fio2.loc[idx, 'VALUENUM'] / 100
ch_fio2 = ch_fio2.loc[(ch_fio2.VALUENUM>=0.2)&(ch_fio2.VALUENUM<=1)]
ch_fio2['NAME'] = 'FiO2'
ch_fio2['VALUEUOM'] = None
ch_fio2['VALUE'] = None
events = pd.concat([events, ch_fio2])
del ch_fio2

# Extract capillary refill rate events. Convert to binary.
print("capillary refill rate")
cr = [3348, 115, 8377, 224308, 223951]
ch_cr = ch.loc[ch.ITEMID.isin(cr)]
ch_cr = ch_cr.loc[~(ch_cr.VALUE=='Other/Remarks')]
idx = (ch_cr.VALUE=='Normal <3 Seconds')|(ch_cr.VALUE=='Normal <3 secs')
ch_cr.loc[idx, 'VALUENUM'] = 1
idx = (ch_cr.VALUE=='Abnormal >3 Seconds')|(ch_cr.VALUE=='Abnormal >3 secs')
ch_cr.loc[idx, 'VALUENUM'] = 2
ch_cr['VALUEUOM'] = None
ch_cr['NAME'] = 'CRR'
events = pd.concat([events, ch_cr])
del ch_cr

# Extract glucose events. Remove outliers.
print("glucose")
gl = [807,811,1529,3745,3744,225664,220621,226537]
ev_gl = pd.concat((ch.loc[ch.ITEMID.isin(gl)], la.loc[la.ITEMID.isin(gl)]))
ev_gl = ev_gl.loc[(ev_gl.VALUENUM>=0)&(ev_gl.VALUENUM<=2200)]
ev_gl['NAME'] = 'Glucose'
ev_gl['VALUEUOM'] = 'mg/dL'
ev_gl['VALUE'] = None
events = pd.concat([events, ev_gl])
del ev_gl

# Extract bilirubin events. Remove outliers.
print("bilirubin")
br_to = [50885]
ev_br = pd.concat((ch.loc[ch.ITEMID.isin(br_to)], la.loc[la.ITEMID.isin(br_to)]))
ev_br = ev_br.loc[(ev_br.VALUENUM>=0)&(ev_br.VALUENUM<=66)]
ev_br.loc[ev_br.ITEMID.isin(br_to), 'NAME'] = 'Bilirubin'
ev_br['VALUEUOM'] = 'mg/dL'
ev_br['VALUE'] = None
events = pd.concat([events, ev_br])
del ev_br

# Extract intubated events.
print("intubated")
itb = [50812]
la_itb = la.loc[la.ITEMID.isin(itb)]
idx = (la_itb.VALUE=='INTUBATED')
la_itb.loc[idx, 'VALUENUM'] = 1
idx = (la_itb.VALUE=='NOT INTUBATED')
la_itb.loc[idx, 'VALUENUM'] = 0
la_itb['VALUEUOM'] = None
la_itb['NAME'] = 'Intubated'
events = pd.concat([events, la_itb])
del la_itb

# Extract multiple events. Remove outliers.
print("multiple events")
o2sat = [834, 50817, 8498, 220227, 646, 220277]
sod = [50983, 50824]
pot = [50971, 50822]
mg = [50960]
po4 = [50970]
ca_total = [50893]
ca_free = [50808]
wbc = [51301, 51300]
hct = [50810, 51221]
hgb = [51222, 50811]
cl = [50902, 50806]
bic = [50882, 50803]
alt = [50861]
alp = [50863]
ast = [50878]
alb = [50862]
lac = [50813]
ld = [50954]
usg = [51498]
ph_ur = [51491, 51094, 220734, 1495, 1880, 1352, 6754, 7262]
ph_bl = [50820]
po2 = [50821]
pco2 = [50818]
tco2 = [50804]
be = [50802]
monos = [51254]
baso = [51146]
eos = [51200]
neuts = [51256]
lym_per = [51244, 51245]
lym_abs = [51133]
pt = [51274]
ptt = [51275]
inr = [51237]
agap = [50868]
bun = [51006]
cr_bl = [50912]
cr_ur = [51082]
mch = [51248]
mchc = [51249]
mcv = [51250]
rdw = [51277]
plt = [51265]
rbc = [51279]

# 44
features = {'O2 Saturation': [o2sat, [0,100], '%'],
            'Sodium': [sod, [0,250], 'mEq/L'], 
            'Potassium': [pot, [0,15], 'mEq/L'], 
            'Magnesium': [mg, [0,22], 'mg/dL'], 
            'Phosphate': [po4, [0,22], 'mg/dL'],
            'Calcium Total': [ca_total, [0,40], 'mg/dL'],
            'Calcium Free': [ca_free, [0,10], 'mmol/L'],
            'WBC': [wbc, [0,1100], 'K/uL'], 
            'Hct': [hct, [0,100], '%'], 
            'Hgb': [hgb, [0,30], 'g/dL'], 
            'Chloride': [cl, [0,200], 'mEq/L'],
            'Bicarbonate': [bic, [0,66], 'mEq/L'],
            'ALT': [alt, [0,11000], 'IU/L'],
            'ALP': [alp, [0,4000], 'IU/L'],
            'AST': [ast, [0,22000], 'IU/L'],
            'Albumin': [alb, [0,10], 'g/dL'],
            'Lactate': [lac, [0,33], 'mmol/L'],
            'LDH': [ld, [0,35000], 'IU/L'],
            'SG Urine': [usg, [0,2], ''],
            'pH Urine': [ph_ur, [0,14], ''],
            'pH Blood': [ph_bl, [0,14], ''],
            'PO2': [po2, [0,770], 'mmHg'],
            'PCO2': [pco2, [0,220], 'mmHg'],
            'Total CO2': [tco2, [0,65], 'mEq/L'],
            'Base Excess': [be, [-31, 28], 'mEq/L'],
            'Monocytes': [monos, [0,100], '%'],
            'Basophils': [baso, [0,100], '%'],
            'Eoisinophils': [eos, [0,100], '%'],
            'Neutrophils': [neuts, [0,100], '%'],
            'Lymphocytes': [lym_per, [0,100], '%'],
            'Lymphocytes (Absolute)': [lym_abs, [0,25000], '#/uL'],
            'PT': [pt, [0,150], 'sec'],
            'PTT': [ptt, [0,150], 'sec'],
            'INR': [inr, [0,150], ''],
            'Anion Gap': [agap, [0,55], 'mg/dL'],
            'BUN': [bun, [0,275], 'mEq/L'],
            'Creatinine Blood': [cr_bl, [0,66], 'mg/dL'],
            'Creatinine Urine': [cr_ur, [0,650], 'mg/dL'],
            'MCH': [mch, [0,50], 'pg'],
            'MCHC': [mchc, [0,50], '%'],
            'MCV': [mcv, [0,150], 'fL'],
            'RDW': [rdw, [0,37], '%'],
            'Platelet Count': [plt, [0,2200], 'K/uL'],
            'RBC': [rbc, [0,14], 'm/uL']
            }

for k, v in features.items():
    print (k)
    ev_k = pd.concat((ch.loc[ch.ITEMID.isin(v[0])], la.loc[la.ITEMID.isin(v[0])]))
    ev_k = ev_k.loc[(ev_k.VALUENUM>=v[1][0])&(ev_k.VALUENUM<=v[1][1])]
    ev_k['NAME'] = k
    ev_k['VALUEUOM'] = v[2]
    ev_k['VALUE'] = None
    assert (ev_k.VALUENUM.isna().sum()==0)
    events = pd.concat([events, ev_k])
del ev_k

# Extract mechanical ventilation. Convert to binary.
print("mechanical ventilation")
vent1 = [467]
vent = [720, 223848, 223849, 445, 448, 449, 450, 1340, 1486, 1600, 224687 ,639, 654, 681, 682, 683, 684, 224685, 224684, 224686, 218, 436, 535, 444, 459, 224697, 224695, 224696, 224746, 224747, 221, 1, 1211, 1655, 2000, 226873, 224738, 224419, 224750, 227187, 543, 5865, 5866, 224707, 224709, 224705, 224706 , 60, 437, 505, 506, 686, 220339, 224700 , 3459 , 501, 502, 503, 224702 , 223, 667, 668, 669, 670, 671, 672 , 224701]
ch_vent1 = ch.loc[ch.ITEMID.isin(vent1)|(ch.VALUE=='Ventilator')]
ch_vent = ch.loc[ch.ITEMID.isin(vent)]
ch_vent = ch_vent.loc[~(ch_vent.VALUE=='Other/Remarks')|~(ch_vent.VALUE=='Other')]
ch_vent = pd.concat([ch_vent1, ch_vent])
ch_vent['VALUENUM'] = 1
ch_vent['NAME'] = 'Mechanical Ventilation'
ch_vent['VALUEUOM'] = None
ch_vent['VALUE'] = None
events = pd.concat([events, ch_vent])
del ch_vent

# Free some memory.
del ch, la

# Extract outputevents.
oe = pd.read_csv(mimic_data_dir+'OUTPUTEVENTS.csv', usecols = ['ICUSTAY_ID', 'HADM_ID', 'ITEMID', 'CHARTTIME', 'VALUE', 'VALUEUOM'])
oe = oe.loc[oe.VALUE.notna()]
oe['VALUENUM'] = oe.VALUE
oe.VALUE = None
oe = oe.loc[oe.ICUSTAY_ID.isin(icu.ICUSTAY_ID)]
oe.ICUSTAY_ID = oe.ICUSTAY_ID.astype(int)
oe['TABLE'] = 'output'

# # Extract information about output items from D_ITEMS.csv.
items = pd.read_csv(mimic_data_dir+'D_ITEMS.csv', usecols=['ITEMID', 'LABEL', 'ABBREVIATION', 'UNITNAME', 'PARAM_TYPE'])
items.loc[items.LABEL.isna(), 'LABEL'] = ''
items.LABEL = items.LABEL.str.lower()
oeitems = oe[['ITEMID']].drop_duplicates()
oeitems = oeitems.merge(items, on='ITEMID', how='left')

uf = [40286]
print ('Ultrafiltrate')
ev_k = oe.loc[oe.ITEMID.isin(uf)]
ind = (ev_k.VALUENUM>=0)&(ev_k.VALUENUM<=7000)
med = ev_k.VALUENUM.loc[ind].median()
ev_k.loc[~ind, 'VALUENUM'] = med
ev_k['NAME'] = 'Ultrafiltrate'
ev_k['VALUEUOM'] = 'mL'
events = pd.concat([events, ev_k])
del ev_k

# # Extract multiple events. Replace outliers with median.
print("multiple output events")
keys = ['urine', 'foley', 'void', 'nephrostomy', 'condom', 'drainage bag']
cond = pd.concat([oeitems.LABEL.str.contains(k) for k in keys], axis=1).any(axis='columns')
ur = list(oeitems.loc[cond].ITEMID)
keys = ['stool', 'fecal', 'colostomy', 'ileostomy', 'rectal']
cond = pd.concat([oeitems.LABEL.str.contains(k) for k in keys], axis=1).any(axis='columns')
st = list(oeitems.loc[cond].ITEMID)
ct = list(oeitems.loc[oeitems.LABEL.str.contains('chest tube')].ITEMID) + [226593, 226590, 226591, 226595, 226592]
gs = [40059, 40052, 226576, 226575, 226573, 40051, 226630]
ebl = [40064, 226626, 40491, 226629]
em = [40067, 226571, 40490, 41015, 40427]
jp = list(oeitems.loc[oeitems.LABEL.str.contains('jackson')].ITEMID)
res = [227510, 227511, 42837, 43892, 44909, 44959]
pre = [40060, 226633]

# # 10
features = {'Urine': [ur, [0,2500], 'mL'],
            'Stool': [st, [0,4000], 'mL'],
            'Chest Tube': [ct, [0,2500], 'mL'],
            'Gastric': [gs, [0,4000], 'mL'],
            'EBL': [ebl, [0,10000], 'mL'],
#             'Pre-admission': [pre, [0,13000], 'mL'], # Repeated by mistake.
            'Emesis': [em, [0,2000], 'mL'],
            'Jackson-Pratt': [jp, [0,2000], 'ml'],
            'Residual': [res, [0, 1050], 'mL'],
            'Pre-admission Output': [pre, [0, 13000], 'ml']
            }

for k, v in features.items():
    print (k)
    ev_k = oe.loc[oe.ITEMID.isin(v[0])]
    ind = (ev_k.VALUENUM>=v[1][0])&(ev_k.VALUENUM<=v[1][1])
    med = ev_k.VALUENUM.loc[ind].median()
    ev_k.loc[~ind, 'VALUENUM'] = med
    ev_k['NAME'] = k
    ev_k['VALUEUOM'] = v[2]
    events = pd.concat([events, ev_k])
del ev_k


# Extract CV and MV inputevents.
ie_cv = pd.read_csv(mimic_data_dir+'INPUTEVENTS_CV.csv',
    usecols = ['ICUSTAY_ID', 'HADM_ID', 'ITEMID', 'CHARTTIME', 
               'AMOUNT', 'AMOUNTUOM'])
ie_cv['TABLE'] = 'input_cv'
ie_cv = ie_cv.loc[ie_cv.AMOUNT.notna()]
ie_cv = ie_cv.loc[ie_cv.ICUSTAY_ID.isin(icu.ICUSTAY_ID)]
ie_cv.CHARTTIME = pd.to_datetime(ie_cv.CHARTTIME)

ie_mv = pd.read_csv(mimic_data_dir+'INPUTEVENTS_MV.csv',
    usecols = ['ICUSTAY_ID', 'HADM_ID', 'ITEMID', 'STARTTIME', 'ENDTIME',
               'AMOUNT', 'AMOUNTUOM'])
ie_mv = ie_mv.loc[ie_mv.ICUSTAY_ID.isin(icu.ICUSTAY_ID)]

# Split MV intervals hourly.
ie_mv.STARTTIME = pd.to_datetime(ie_mv.STARTTIME)
ie_mv.ENDTIME = pd.to_datetime(ie_mv.ENDTIME)
ie_mv['TD'] = ie_mv.ENDTIME - ie_mv.STARTTIME
new_ie_mv = ie_mv.loc[ie_mv.TD<=pd.Timedelta(1,'h')].drop(columns=['STARTTIME', 'TD'])
ie_mv = ie_mv.loc[ie_mv.TD>pd.Timedelta(1,'h')]
new_rows = []
for _,row in tqdm(ie_mv.iterrows()):
    admid, icuid, iid, amo, uom, stm, td = row.HADM_ID, row.ICUSTAY_ID, row.ITEMID, row.AMOUNT, row.AMOUNTUOM, row.STARTTIME, row.TD
    td = td.total_seconds()/60
    num_hours = td // 60
    hour_amount = 60*amo/td
    for i in range(1,int(num_hours)+1):
        new_rows.append([admid, icuid, iid, stm+pd.Timedelta(i,'h'), hour_amount, uom])
    rem_mins = td % 60
    if rem_mins>0:
        new_rows.append([admid, icuid, iid, row['ENDTIME'], rem_mins*amo/td, uom])
new_rows = pd.DataFrame(new_rows, columns=['HADM_ID', 'ICUSTAY_ID', 'ITEMID', 'ENDTIME', 'AMOUNT', 'AMOUNTUOM'])
new_ie_mv = pd.concat((new_ie_mv, new_rows))
ie_mv = new_ie_mv.copy()
del new_ie_mv
ie_mv['TABLE'] = 'input_mv' 
ie_mv.rename(columns={'ENDTIME':'CHARTTIME'}, inplace=True)

# Combine CV and MV inputevents.
ie = pd.concat((ie_cv, ie_mv))
del ie_cv, ie_mv
ie.rename(columns={'AMOUNT':'VALUENUM', 'AMOUNTUOM':'VALUEUOM'}, inplace=True)
events.CHARTTIME = pd.to_datetime(events.CHARTTIME)

# Convert mcg->mg, L->ml.
ind = (ie.VALUEUOM=='mcg')
ie.loc[ind, 'VALUENUM'] = ie.loc[ind, 'VALUENUM']*0.001
ie.loc[ind, 'VALUEUOM'] = 'mg'
ind = (ie.VALUEUOM=='L')
ie.loc[ind, 'VALUENUM'] = ie.loc[ind, 'VALUENUM']*1000
ie.loc[ind, 'VALUEUOM'] = 'ml'

# Extract Vasopressin events. Remove outliers.
print("Vasopressin")
vaso = [30051, 222315]
ev_vaso = ie.loc[ie.ITEMID.isin(vaso)]
ind1 = (ev_vaso.VALUENUM==0)
ind2 = ev_vaso.VALUEUOM.isin(['U','units'])
ind3 = (ev_vaso.VALUENUM>=0)&(ev_vaso.VALUENUM<=400)
ind = ((ind2&ind3)|ind1)
med = ev_vaso.VALUENUM.loc[ind].median()
ev_vaso.loc[~ind, 'VALUENUM'] = med
ev_vaso['VALUEUOM'] = 'units'
ev_vaso['NAME'] = 'Vasopressin'
events = pd.concat([events, ev_vaso])
del ev_vaso

# Extract Vancomycin events. Convert dose,g to mg. Remove outliers.
print("Vancomycin")
vanc = [225798]
ev_vanc = ie.loc[ie.ITEMID.isin(vanc)]
ind = ev_vanc.VALUEUOM.isin(['mg'])
ev_vanc.loc[ind, 'VALUENUM'] = ev_vanc.loc[ind, 'VALUENUM']*0.001 
ev_vanc['VALUEUOM'] = 'g'
ind = (ev_vanc.VALUENUM>=0)&(ev_vanc.VALUENUM<=8)
med = ev_vanc.VALUENUM.loc[ind].median()
ev_vanc.loc[~ind, 'VALUENUM'] = med
ev_vanc['NAME'] = 'Vacomycin'
events = pd.concat([events, ev_vanc])
del ev_vanc

# Extract Calcium Gluconate events. Convert units. Remove outliers.
print("Calcium Gluconate")
cagl = [30023, 221456, 227525, 42504, 43070, 45699, 46591, 44346, 46291]
ev_cagl = ie.loc[ie.ITEMID.isin(cagl)]
ind = ev_cagl.VALUEUOM.isin(['mg'])
ev_cagl.loc[ind, 'VALUENUM'] = ev_cagl.loc[ind, 'VALUENUM']*0.001 
ind1 = (ev_cagl.VALUENUM==0)
ind2 = ev_cagl.VALUEUOM.isin(['mg', 'gm', 'grams'])
ind3 = (ev_cagl.VALUENUM>=0)&(ev_cagl.VALUENUM<=200)
ind = (ind2&ind3)|ind1
med = ev_cagl.VALUENUM.loc[ind].median()
ev_cagl.loc[~ind, 'VALUENUM'] = med
ev_cagl['VALUEUOM'] = 'g'
ev_cagl['NAME'] = 'Calcium Gluconate'
events = pd.concat([events, ev_cagl])
del ev_cagl

# Extract Furosemide events. Remove outliers.
print("Furosemide")
furo = [30123, 221794, 228340]
ev_furo = ie.loc[ie.ITEMID.isin(furo)]
ind1 = (ev_furo.VALUENUM==0)
ind2 = (ev_furo.VALUEUOM=='mg')
ind3 = (ev_furo.VALUENUM>=0)&(ev_furo.VALUENUM<=250)
ind = ind1|(ind2&ind3)
med = ev_furo.VALUENUM.loc[ind].median()
ev_furo.loc[~ind, 'VALUENUM'] = med
ev_furo['VALUEUOM'] = 'mg'
ev_furo['NAME'] = 'Furosemide'
events = pd.concat([events, ev_furo])
del ev_furo

# Extract Famotidine events. Remove outliers.
print("Famotidine")
famo = [225907]
ev_famo = ie.loc[ie.ITEMID.isin(famo)]
ind1 = (ev_famo.VALUENUM==0)
ind2 = (ev_famo.VALUEUOM=='dose')
ind3 = (ev_famo.VALUENUM>=0)&(ev_famo.VALUENUM<=1)
ind = ind1|(ind2&ind3)
med = ev_famo.VALUENUM.loc[ind].median()
ev_famo.loc[~ind, 'VALUENUM'] = med
ev_famo['VALUEUOM'] = 'dose'
ev_famo['NAME'] = 'Famotidine'
events = pd.concat([events, ev_famo])
del ev_famo

# Extract Piperacillin events. Convert units. Remove outliers.
print("Piperacillin")
pipe = [225893, 225892]
ev_pipe = ie.loc[ie.ITEMID.isin(pipe)]
ind1 = (ev_pipe.VALUENUM==0)
ind2 = (ev_pipe.VALUEUOM=='dose')
ind3 = (ev_pipe.VALUENUM>=0)&(ev_pipe.VALUENUM<=1)
ind = ind1|(ind2&ind3)
med = ev_pipe.VALUENUM.loc[ind].median()
ev_pipe.loc[~ind, 'VALUENUM'] = med
ev_pipe['VALUEUOM'] = 'dose'
ev_pipe['NAME'] = 'Piperacillin'
events = pd.concat([events, ev_pipe])
del ev_pipe

# Extract Cefazolin events. Convert units. Remove outliers.
print("Cefazolin")
cefa = [225850]
ev_cefa = ie.loc[ie.ITEMID.isin(cefa)]
ind1 = (ev_cefa.VALUENUM==0)
ind2 = (ev_cefa.VALUEUOM=='dose')
ind3 = (ev_cefa.VALUENUM>=0)&(ev_cefa.VALUENUM<=2)
ind = ind1|(ind2&ind3)
med = ev_cefa.VALUENUM.loc[ind].median()
ev_cefa.loc[~ind, 'VALUENUM'] = med 
ev_cefa['VALUEUOM'] = 'dose'
ev_cefa['NAME'] = 'Cefazolin'
events = pd.concat([events, ev_cefa])
del ev_cefa

# Extract Fiber events. Remove outliers.
print("Fiber")
fibe = [225936, 30166, 30073, 227695, 30088, 225928, 226051, 226050, 226048, 45381, 45597, 227699, 227696, 44218, 45406, 44675, 226049, 44202, 45370, 227698, 226027, 42106, 43994, 45865, 44318, 42091, 44699, 44010, 43134, 44045, 43088, 42641, 45691, 45515, 45777, 42663, 42027, 44425, 45657, 45775, 44631, 44106, 42116, 44061, 44887, 42090, 42831, 45541, 45497, 46789, 44765, 42050]
ev_fibe = ie.loc[ie.ITEMID.isin(fibe)]
ind1 = (ev_fibe.VALUENUM==0)
ind2 = (ev_fibe.VALUEUOM=='ml')
ind3 = (ev_fibe.VALUENUM>=0)&(ev_fibe.VALUENUM<=1600)
ind = ind1|(ind2&ind3)
med = ev_fibe.VALUENUM.loc[ind].median()
ev_fibe.loc[~ind, 'VALUENUM'] = med 
ev_fibe['NAME'] = 'Fiber'
ev_fibe['VALUEUOM'] = 'ml'
events = pd.concat([events, ev_fibe])
del ev_fibe

# Extract Pantoprazole events. Remove outliers.
print("Pantoprazole")
pant = [225910, 40549, 41101, 41583, 44008, 40700, 40550]
ev_pant = ie.loc[ie.ITEMID.isin(pant)]
ind = (ev_pant.VALUENUM>0)
ev_pant.loc[ind, 'VALUENUM'] = 1
ind = (ev_pant.VALUENUM>=0)
med = ev_pant.VALUENUM.loc[ind].median()
ev_pant.loc[~ind, 'VALUENUM'] = med
ev_pant['NAME'] = 'Pantoprazole'
ev_pant['VALUEUOM'] = 'dose'
events = pd.concat([events, ev_pant])
del ev_pant

# Extract Magnesium Sulphate events. Remove outliers.
print("Magnesium Sulphate")
masu = [222011, 30027, 227524]
ev_masu = ie.loc[ie.ITEMID.isin(masu)]
ind = (ev_masu.VALUEUOM=='mg')
ev_masu.loc[ind, 'VALUENUM'] = ev_masu.loc[ind, 'VALUENUM']*0.001
ind1 = (ev_masu.VALUENUM==0)
ind2 = ev_masu.VALUEUOM.isin(['gm', 'grams', 'mg'])
ind3 = (ev_masu.VALUENUM>=0)&(ev_masu.VALUENUM<=125)
ind = ind1|(ind2&ind3)
med = ev_masu.VALUENUM.loc[ind].median()
ev_masu.loc[~ind, 'VALUENUM'] = med 
ev_masu['VALUEUOM'] = 'g'
ev_masu['NAME'] = 'Magnesium Sulphate'
events = pd.concat([events, ev_masu])
del ev_masu

# Extract Potassium Chloride events. Remove outliers.
print("Potassium Chloride")
poch = [30026, 225166, 227536]
ev_poch = ie.loc[ie.ITEMID.isin(poch)]
ind1 = (ev_poch.VALUENUM==0)
ind2 = ev_poch.VALUEUOM.isin(['mEq', 'mEq.'])
ind3 = (ev_poch.VALUENUM>=0)&(ev_poch.VALUENUM<=501)
ind = ind1|(ind2&ind3)
med = ev_poch.VALUENUM.loc[ind].median()
ev_poch.loc[~ind, 'VALUENUM'] = med 
ev_poch['VALUEUOM'] = 'mEq'
ev_poch['NAME'] = 'KCl'
events = pd.concat([events, ev_poch])
del ev_poch

# Extract multiple events. Remove outliers.
print("multiple input events")
mida = [30124, 221668]
prop = [30131, 222168]
albu25 = [220862, 30009]
albu5 = [220864, 30008]
ffpl = [30005, 220970]
lora = [30141, 221385]
mosu = [30126, 225154]
game = [30144, 225799]
lari = [30021, 225828]
milr = [30125, 221986]
crys = [30101, 226364, 30108, 226375]
hepa = [30025, 225975, 225152]
prbc = [30001, 225168, 30104, 226368, 227070]
poin = [30056, 226452, 30109, 226377]
neos = [30128, 221749, 30127]
pigg = [226089, 30063]
nigl = [30121, 222056, 30049]
nipr = [30050, 222051]
meto = [225974]
nore = [30120, 221906, 30047]
coll = [30102, 226365, 30107, 226376]
hyzi = [221828]
gtfl = [226453, 30059]
hymo = [30163, 221833]
fent = [225942, 30118, 221744, 30149]
inre = [30045, 223258, 30100]
inhu = [223262]
ingl = [223260]
innp = [223259]
nana = [30140]
d5wa = [30013, 220949]
doth = [30015, 225823, 30060, 225825, 220950, 30016, 30061, 225827, 225941, 30160, 220952, 30159, 30014, 30017, 228142, 228140, 45360, 228141, 41550]
nosa = [225158, 30018]
hans = [30020, 225159]
stwa = [225944, 30065]
frwa = [30058, 225797, 41430, 40872, 41915, 43936, 41619, 42429, 44492, 46169, 42554]
solu = [225943]
dopa = [30043, 221662]
epin = [30119, 221289, 30044]
amio = [30112, 221347, 228339, 45402]
tpnu = [30032, 225916, 225917, 30096]
msbo = [227523]
pcbo = [227522]
prad = [30054, 226361]

# 43
features = {'Midazolam': [mida, [0, 500], 'mg'],
            'Propofol': [prop, [0, 12000], 'mg'],
            'Albumin 25%': [albu25, [0, 750], 'ml'],
            'Albumin 5%': [albu5, [0, 1300], 'ml'],
            'Fresh Frozen Plasma': [ffpl, [0, 33000], 'ml'],
            'Lorazepam': [lora, [0, 300], 'mg'],
            'Morphine Sulfate': [mosu, [0, 4000], 'mg'],
            'Gastric Meds': [game, [0, 7000], 'ml'],
            'Lactated Ringers': [lari, [0, 17000], 'ml'],
            'Milrinone': [milr, [0, 50], 'ml'],
            'OR/PACU Crystalloid': [crys, [0, 22000], 'ml'],
            'Packed RBC': [prbc, [0, 17250], 'ml'],
            'PO intake': [poin, [0, 11000], 'ml'],
            'Neosynephrine': [neos, [0, 1200], 'mg'],
            'Piggyback': [pigg, [0, 1000], 'ml'],
            'Nitroglycerine': [nigl, [0, 350], 'mg'],
            'Nitroprusside': [nipr, [0, 430], 'mg'],
            'Metoprolol': [meto, [0, 151], 'mg'],
            'Norepinephrine': [nore, [0, 80], 'mg'],
            'Colloid': [coll, [0, 20000], 'ml'],
            'Hydralazine': [hyzi, [0, 80], 'mg'],
            'GT Flush': [gtfl, [0, 2100], 'ml'],
            'Hydromorphone': [hymo, [0, 125], 'mg'],
            'Fentanyl': [fent, [0, 20], 'mg'],
            'Insulin Regular': [inre, [0, 1500], 'units'],
            'Insulin Humalog': [inhu, [0, 340], 'units'],
            'Insulin largine': [ingl, [0, 150], 'units'],
            'Insulin NPH': [innp, [0, 100], 'units'],
            'Unknown': [nana, [0, 1100], 'ml'],
            'D5W': [d5wa, [0,11000], 'ml'],
            'Dextrose Other': [doth, [0,4000], 'ml'],
            'Normal Saline': [nosa, [0, 11000], 'ml'],
            'Half Normal Saline': [hans, [0, 2000], 'ml'],
            'Sterile Water': [stwa, [0, 10000], 'ml'],
            'Free Water': [frwa, [0, 2500], 'ml'],
            'Solution': [solu, [0, 1500], 'ml'],
            'Dopamine': [dopa, [0, 1300], 'mg'],
            'Epinephrine': [epin, [0, 100], 'mg'],
            'Amiodarone': [amio, [0, 1200], 'mg'],
            'TPN': [tpnu, [0, 1600], 'ml'],
            'Magnesium Sulfate (Bolus)': [msbo, [0, 250], 'ml'],
            'KCl (Bolus)': [pcbo, [0, 500], 'ml'],
            'Pre-admission Intake': [prad, [0, 30000], 'ml']
            }

for k, v in features.items():
    print (k)
    ev_k = ie.loc[ie.ITEMID.isin(v[0])]
    ind = (ev_k.VALUENUM>=v[1][0])&(ev_k.VALUENUM<=v[1][1])
    med = ev_k.VALUENUM.loc[ind].median()
    ev_k.loc[~ind, 'VALUENUM'] = med
    ev_k['NAME'] = k
    ev_k['VALUEUOM'] = v[2]
    events = pd.concat([events, ev_k])
del ev_k

# Extract heparin events. (Missed earlier.)
print("heparin")
ev_k = ie.loc[ie.ITEMID.isin(hepa)]
ind1 = ev_k.VALUEUOM.isin(['U', 'units'])
ind2 = (ev_k.VALUENUM>=0)&(ev_k.VALUENUM<=25300)
ind = (ind1&ind2)
med = ev_k.VALUENUM.loc[ind].median()
ev_k.loc[~ind, 'VALUENUM'] = med
ev_k['NAME'] = 'Heparin'
ev_k['VALUEUOM'] = 'units'
events = pd.concat([events, ev_k])
del ev_k

# Save data.
print("saving done")
events.to_csv('mimic_iii_events.csv', index=False)
icu.to_csv('mimic_iii_icu.csv', index=False)
print("save done")
